#https://datatables.net/reference/option/
options(DT.options = list(scrollX = TRUE, pagin=TRUE, fixedHeader = TRUE, searchHighlight = TRUE))| Name | train |
| Number of rows | 56375 |
| Number of columns | 31 |
| _______________________ | |
| Column type frequency: | |
| Date | 1 |
| factor | 13 |
| numeric | 17 |
| ________________________ | |
| Group variables | None |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| reservation_status_date | 0 | 1 | 2015-07-01 | 2017-09-14 | 2016-09-03 | 804 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| agent | 0 | 1 | FALSE | 301 | 9: 13983, NUL: 9281, 240: 6342, 7: 2309 |
| arrival_date_month | 0 | 1 | FALSE | 12 | Aug: 6557, Jul: 5965, May: 5255, Oct: 5176 |
| assigned_room_type | 0 | 1 | FALSE | 10 | A: 30769, D: 14227, E: 4412, F: 2125 |
| company | 0 | 1 | FALSE | 313 | NUL: 52139, 40: 652, 223: 501, 45: 170 |
| country | 0 | 1 | FALSE | 153 | PRT: 15728, GBR: 7254, FRA: 6395, ESP: 4793 |
| customer_type | 0 | 1 | FALSE | 4 | Tra: 39897, Tra: 14023, Con: 2065, Gro: 390 |
| deposit_type | 0 | 1 | FALSE | 3 | No : 56196, Ref: 103, Non: 76 |
| distribution_channel | 0 | 1 | FALSE | 4 | TA/: 43271, Dir: 9061, Cor: 3918, GDS: 125 |
| hotel | 0 | 1 | FALSE | 2 | Cit: 34668, Res: 21707 |
| market_segment | 0 | 1 | FALSE | 7 | Onl: 26891, Off: 11858, Dir: 7990, Gro: 5756 |
| meal | 0 | 1 | FALSE | 5 | BB: 43371, HB: 7049, SC: 5072, Und: 644 |
| reservation_status | 0 | 1 | FALSE | 1 | Che: 56375, Can: 0, No-: 0 |
| reserved_room_type | 0 | 1 | FALSE | 9 | A: 39271, D: 9773, E: 3469, F: 1537 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| adr | 0 | 1 | 99.96 | 49.20 | 0 | 67.39 | 92.65 | 125 | 508 | <U+2587><U+2585><U+2581><U+2581><U+2581> |
| adults | 0 | 1 | 1.83 | 0.51 | 0 | 2.00 | 2.00 | 2 | 4 | <U+2581><U+2582><U+2587><U+2581><U+2581> |
| arrival_date_day_of_month | 0 | 1 | 15.85 | 8.78 | 1 | 8.00 | 16.00 | 23 | 31 | <U+2587><U+2587><U+2587><U+2587><U+2586> |
| arrival_date_week_number | 0 | 1 | 27.12 | 13.90 | 1 | 16.00 | 28.00 | 38 | 53 | <U+2586><U+2587><U+2587><U+2587><U+2586> |
| arrival_date_year | 0 | 1 | 2016.15 | 0.70 | 2015 | 2016.00 | 2016.00 | 2017 | 2017 | <U+2583><U+2581><U+2587><U+2581><U+2586> |
| babies | 0 | 1 | 0.01 | 0.11 | 0 | 0.00 | 0.00 | 0 | 10 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| booking_changes | 0 | 1 | 0.29 | 0.73 | 0 | 0.00 | 0.00 | 0 | 21 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| children | 0 | 1 | 0.10 | 0.39 | 0 | 0.00 | 0.00 | 0 | 3 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| days_in_waiting_list | 0 | 1 | 1.58 | 14.83 | 0 | 0.00 | 0.00 | 0 | 379 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| is_repeated_guest | 0 | 1 | 0.04 | 0.20 | 0 | 0.00 | 0.00 | 0 | 1 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| lead_time | 0 | 1 | 79.89 | 91.04 | 0 | 9.00 | 45.00 | 124 | 737 | <U+2587><U+2582><U+2581><U+2581><U+2581> |
| previous_bookings_not_canceled | 0 | 1 | 0.20 | 1.82 | 0 | 0.00 | 0.00 | 0 | 72 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| previous_cancellations | 0 | 1 | 0.02 | 0.28 | 0 | 0.00 | 0.00 | 0 | 13 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| required_car_parking_spaces | 0 | 1 | 0.10 | 0.30 | 0 | 0.00 | 0.00 | 0 | 8 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| stays_in_week_nights | 0 | 1 | 2.46 | 1.92 | 0 | 1.00 | 2.00 | 3 | 50 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| stays_in_weekend_nights | 0 | 1 | 0.93 | 0.99 | 0 | 0.00 | 1.00 | 2 | 19 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| total_of_special_requests | 0 | 1 | 0.72 | 0.84 | 0 | 0.00 | 1.00 | 1 | 5 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
# make arrival date col
train = train %>% mutate(
arrival.date = make_date(
year = arrival_date_year,
month = match(arrival_date_month, month.name),
day = arrival_date_day_of_month)
)
# these numeric vars s/b factor vars
train = train %>% mutate_at(vars(arrival_date_day_of_month, arrival_date_week_number, arrival_date_year, is_repeated_guest), factor)
# reordering df
train = train %>% select(sort(tidyselect::peek_vars())) %>%
select(
where(is.Date),
where(is.factor),
where(is.numeric)
)Note: not a true time series in that the arrival month is a factor
paste(
'The date range of this dataset is from',
train %>% pull(arrival.date) %>% range %>% .[1],
'to',
train %>% pull(arrival.date) %>% range %>% .[2]
)## [1] "The date range of this dataset is from 2015-07-01 to 2017-08-31"
train %>% group_by(arrival.date, hotel) %>%
summarise(total.bookings = sum(adults, children)) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~total.bookings
) %>% layout(
title = 'total.bookings by date',
xaxis = list(title = ''),
yaxis = list(title = '')
)## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
train %>% group_by(arrival.date, hotel) %>%
summarise(count = n()) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~count,
color = ~hotel,
alphtrain = 0.7
) %>% layout(
title = 'total.bookings by date/hotel',
xaxis = list(title = ''),
yaxis = list(title = '')
)## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
train %>% group_by(arrival.date, customer_type) %>%
summarise(count = n()) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~count,
color = ~customer_type,
alphtrain = 0.7
) %>% layout(
title = 'total.bookings by date/customer_type',
xaxis = list(title = ''),
yaxis = list(title = '')
)## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
train %>% group_by(arrival.date, deposit_type) %>%
summarise(count = n()) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~count,
color = ~deposit_type,
alphtrain = 0.7
) %>% layout(
title = 'total.bookings by date/deposit_type',
xaxis = list(title = ''),
yaxis = list(title = '')
)## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
train %>% group_by(arrival.date, distribution_channel) %>%
summarise(count = n()) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~count,
color = ~distribution_channel,
alphtrain = 0.7
) %>% layout(
title = 'total.bookings by date/distribution_channel',
xaxis = list(title = ''),
yaxis = list(title = '')
)## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Rows: 56,375
## Columns: 17
## $ agent <fct> NULL, NULL, 304, 240, 240, 303, 240, 241,...
## $ arrival_date_day_of_month <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ arrival_date_month <fct> July, July, July, July, July, July, July,...
## $ arrival_date_week_number <fct> 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 2...
## $ arrival_date_year <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015,...
## $ assigned_room_type <fct> C, C, A, A, A, C, E, G, E, E, E, E, G, F,...
## $ company <fct> NULL, NULL, NULL, NULL, NULL, NULL, NULL,...
## $ country <fct> PRT, PRT, GBR, GBR, GBR, PRT, USA, ESP, P...
## $ customer_type <fct> Transient, Transient, Transient, Transien...
## $ deposit_type <fct> No Deposit, No Deposit, No Deposit, No De...
## $ distribution_channel <fct> Direct, Direct, Corporate, TA/TO, TA/TO, ...
## $ hotel <fct> Resort Hotel, Resort Hotel, Resort Hotel,...
## $ is_repeated_guest <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ market_segment <fct> Direct, Direct, Corporate, Online TA, Onl...
## $ meal <fct> BB, BB, BB, BB, BB, FB, BB, HB, BB, BB, B...
## $ reservation_status <fct> Check-Out, Check-Out, Check-Out, Check-Ou...
## $ reserved_room_type <fct> C, C, A, A, A, C, D, G, E, D, E, A, A, F,...
jpal = colorRampPalette(brewer.pal(8,'Dark2'))(15)
train %>% select(where(is.factor)) %>%
map(n_unique) %>%
as.tibble() %>%
pivot_longer(everything()) %>%
plot_ly(y = ~name, x = ~value, color = ~name, colors = jpal) %>%
add_bars() %>%
hide_legend() %>%
layout(
title = 'distribution of level counts per factor',
xaxis = list(title = ''),
yaxis = list(title = '')
)## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## $agent
## [1] NULL 304 240 303 241 8 250 5 175 134 156 243 242 115 105
## [16] 40 147 184 96 2 127 95 177 6 15 305 67 196 152 142
## [31] 171 36 104 261 306 149 26 258 71 146 181 88 143 251 275
## [46] 248 208 69 110 256 314 126 281 253 185 330 334 328 326 324
## [61] 321 313 38 155 68 335 308 332 387 298 273 315 307 75 201
## [76] 183 223 94 3 446 468 9 34 327 139 436 270 339 47 128
## [91] 154 114 29 301 245 244 193 1 16 336 135 350 195 352 355
## [106] 348 10 168 363 384 360 375 66 331 91 64 385 78 393 406
## [121] 249 405 163 414 333 11 427 431 430 426 438 433 418 441 72
## [136] 450 434 454 455 368 451 57 180 358 464 411 481 469 165 254
## [151] 467 510 531 440 337 526 493 502 527 479 410 508 535 302 497
## [166] 187 429 13 7 27 17 28 14 42 20 19 37 61 22 39
## [181] 21 24 30 50 52 12 44 31 83 32 63 56 89 159 86
## [196] 79 132 45 4 82 81 74 92 99 85 87 112 117 106 98
## [211] 111 119 148 151 138 121 158 167 144 118 153 211 210 129 213
## [226] 174 220 173 216 232 35 23 58 205 157 133 150 214 290 192
## [241] 191 267 215 252 247 278 280 285 289 269 295 288 122 294 325
## [256] 234 341 310 344 77 103 346 359 283 364 370 33 371 25 179
## [271] 53 227 141 378 391 397 404 299 73 354 444 296 461 390 388
## [286] 453 425 394 262 459 474 229 475 480 423 484 495 219 476 509
## [301] 449
## 334 Levels: 1 10 103 104 105 106 107 11 110 111 112 114 115 117 118 119 ... NULL
##
## $arrival_date_day_of_month
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31
## 31 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 31
##
## $arrival_date_month
## [1] July August September October November December January
## [8] February March April May June
## 12 Levels: April August December February January July June March ... September
##
## $arrival_date_week_number
## [1] 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## [26] 52 53 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [51] 24 25 26
## 53 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 53
##
## $arrival_date_year
## [1] 2015 2016 2017
## Levels: 2015 2016 2017
##
## $assigned_room_type
## [1] C A E G F D B H I K
## Levels: A B C D E F G H I K L P
##
## $company
## [1] NULL 110 270 240 154 144 307 268 59 312 318 174 274 113 195
## [16] 223 317 281 118 53 286 12 324 342 371 47 331 178 405 337
## [31] 94 528 62 51 120 42 82 81 116 530 103 204 112 135 9
## [46] 39 16 92 31 61 356 457 501 86 165 291 292 290 43 325
## [61] 192 108 34 224 388 269 465 287 297 490 207 169 282 20 437
## [76] 263 225 329 272 28 482 200 338 83 72 246 319 159 380 323
## [91] 511 407 421 88 278 80 403 399 84 137 343 346 347 289 351
## [106] 355 54 99 250 358 361 390 362 366 372 365 277 109 14 377
## [121] 379 22 378 330 364 401 232 384 167 212 514 391 400 376 392
## [136] 402 396 302 370 367 397 369 409 251 168 428 382 408 413 148
## [151] 10 333 360 415 422 395 435 442 445 448 443 454 444 394 52
## [166] 459 458 456 353 254 460 447 470 255 466 184 485 32 491 494
## [181] 193 516 496 499 308 29 78 146 504 130 520 507 506 498 515
## [196] 512 126 64 242 477 518 521 523 539 436 525 541 40 455 410
## [211] 45 38 49 67 68 65 91 8 221 46 76 96 100 115 105
## [226] 101 93 11 139 142 127 107 140 143 163 149 150 180 238 219
## [241] 186 179 183 222 153 197 203 185 217 209 215 230 35 216 227
## [256] 245 218 158 259 260 411 257 271 18 106 275 210 273 71 284
## [271] 301 233 305 293 264 311 304 313 288 320 334 314 332 341 349
## [286] 350 73 383 368 393 220 412 420 426 417 243 429 433 446 450
## [301] 418 424 280 357 483 439 489 229 486 481 497 451 492
## 353 Levels: 10 100 101 102 103 104 105 106 107 108 109 11 110 112 113 ... NULL
##
## $country
## [1] PRT GBR USA ESP IRL FRA NULL ROU NOR OMN ARG POL DEU BEL CN
## [16] CHE ITA NLD DNK SWE AUS EST CZE BRA FIN MOZ BWA LUX RUS ALB
## [31] IND CHN MAR SVN UKR LVA BLR LTU TUR MEX AGO ISR CHL CYM ZMB
## [46] AUT ZWE DZA CRI KOR HUN HRV CYP NZL KAZ THA COL DOM MKD PRI
## [61] MYS GRC NGA VEN GIB JPN LKA ZAF CMR IRN BIH MUS COM SUR CUB
## [76] BGR CIV JOR SYR SGP BDI KWT URY LBN AZE ARE QAT EGY PER SVK
## [91] CPV MDV SRB MLT MWI ECU MDG IDN ISL CAF JAM UZB NPL BHS PAK
## [106] TGO TWN HKG DJI VNM PHL GEO TUN SEN SAU ETH IRQ LIE MMR PAN
## [121] TMP BFA ARM KEN MCO GNB LBY TZA BGD NAM BOL SYC PRY BRB ABW
## [136] AIA SLV DMA GAB PYF UGA GUY LCA MNE GTM GHA ASM NCL STP KIR
## [151] TJK LAO FRO
## 178 Levels: ABW AGO AIA ALB AND ARE ARG ARM ASM ATA ATF AUS AUT AZE BDI ... ZWE
##
## $customer_type
## [1] Transient Contract Transient-Party Group
## Levels: Contract Group Transient Transient-Party
##
## $deposit_type
## [1] No Deposit Refundable Non Refund
## Levels: No Deposit Non Refund Refundable
##
## $distribution_channel
## [1] Direct Corporate TA/TO GDS
## Levels: Corporate Direct GDS TA/TO Undefined
##
## $hotel
## [1] Resort Hotel City Hotel
## Levels: City Hotel Resort Hotel
##
## $is_repeated_guest
## [1] 0 1
## Levels: 0 1
##
## $market_segment
## [1] Direct Corporate Online TA Offline TA/TO Complementary
## [6] Groups Aviation
## 8 Levels: Aviation Complementary Corporate Direct Groups ... Undefined
##
## $meal
## [1] BB FB HB SC Undefined
## Levels: BB FB HB SC Undefined
##
## $reservation_status
## [1] Check-Out
## Levels: Canceled Check-Out No-Show
##
## $reserved_room_type
## [1] C A D G E F H L B
## Levels: A B C D E F G H L P
## Rows: 56,375
## Columns: 13
## $ adr <dbl> 0.00, 0.00, 75.00, 98.00, 98.00, 103...
## $ adults <dbl> 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
## $ babies <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ booking_changes <dbl> 3, 4, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
## $ children <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
## $ days_in_waiting_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ lead_time <dbl> 342, 737, 13, 14, 14, 9, 68, 18, 37,...
## $ previous_bookings_not_canceled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ previous_cancellations <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ required_car_parking_spaces <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ stays_in_week_nights <dbl> 0, 0, 1, 2, 2, 2, 4, 4, 4, 4, 4, 1, ...
## $ stays_in_weekend_nights <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ total_of_special_requests <dbl> 0, 0, 0, 1, 1, 1, 3, 1, 0, 3, 0, 0, ...
jquantiles = function(col){quantile(col, probs = c(0.90, 0.95, 0.99, 1))}
train %>% na.omit %>% select(where(is.numeric)) %>%
map(.x = . , jquantiles) %>%
as.data.frame.list() %>%
rownames_to_column() %>%
as.tibble()#with outliers
train %>% select(where(is.numeric)) %>% DataExplorer::plot_histogram(nrow = 2, ncol = 1)#no outliers
train %>% select(where(is.numeric)) %>% filter(
adr != 5400,
adults != 55,
babies != 10,
booking_changes != 21,
children != 10,
days_in_waiting_list != 391,
lead_time != 709,
previous_bookings_not_canceled != 72,
previous_cancellations != 26,
required_car_parking_spaces != 8,
stays_in_week_nights != 50,
stays_in_weekend_nights != 19
) %>% DataExplorer::plot_histogram(nrow = 2, ncol = 1)#no outliers
train %>% select(hotel, where(is.numeric)) %>% filter(
adr != 5400,
adults != 55,
babies != 10,
booking_changes != 21,
children != 10,
days_in_waiting_list != 391,
lead_time != 709,
previous_bookings_not_canceled != 72,
previous_cancellations != 26,
required_car_parking_spaces != 8,
stays_in_week_nights != 50,
stays_in_weekend_nights != 19
) %>% DataExplorer::plot_boxplot(by = 'hotel', nrow = 3, ncol = 1)#looks like one outlier in adr is changing the scale, making it hard to see the true distribution --remove outlier
a$adr %>% range## [1] -6.38 510.00
train %>% filter(adr != 5400) %>%
select(hotel, adr) %>%
plot_ly(y = ~hotel, x = ~adr, color = ~hotel, colors = jpal[1:2]) %>%
add_boxplot()#https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train %>% filter(adr != 5400) %>%
select(hotel, adr, customer_type) %>%
plot_ly(y = ~hotel, x = ~adr, color = ~customer_type, colors = jpal, group = ~customer_type) %>%
add_boxplot() %>%
layout(
boxmode = 'group', #SUPER IMPORTANT
title = 'ADR by Hotel/customer_type'
) ## Warning in plot_ly(., y = ~hotel, x = ~adr, color = ~customer_type, colors = jpal, : The group argument has been deprecated. Use `group_by()` or split instead.
## See `help('plotly_data')` for examples
## Warning: 'layout' objects don't have these attributes: 'boxmode'
## Valid attributes include:
## 'font', 'title', 'uniformtext', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
#https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train %>% filter(adr != 5400) %>%
select(hotel, adr, market_segment) %>%
plot_ly(y = ~hotel, x = ~adr, color = ~market_segment, colors = jpal, group = ~market_segment) %>%
add_boxplot() %>%
layout(
boxmode = 'group', #SUPER IMPORTANT
title = 'ADR by Hotel/market_segment'
) ## Warning in plot_ly(., y = ~hotel, x = ~adr, color = ~market_segment, colors = jpal, : The group argument has been deprecated. Use `group_by()` or split instead.
## See `help('plotly_data')` for examples
## Warning in plot_ly(., y = ~hotel, x = ~adr, color = ~market_segment, colors = jpal, : 'layout' objects don't have these attributes: 'boxmode'
## Valid attributes include:
## 'font', 'title', 'uniformtext', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
#https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train %>% filter(adr != 5400) %>%
select(hotel, adr, arrival_date_month) %>%
plot_ly(x = ~hotel, y = ~adr, color = ~arrival_date_month, colors = jpal, group = ~arrival_date_month) %>%
add_boxplot() %>%
layout(
boxmode = 'group', #SUPER IMPORTANT
title = 'ADR by Hotel/arrival_date_month'
) ## Warning in plot_ly(., x = ~hotel, y = ~adr, color = ~arrival_date_month, : The group argument has been deprecated. Use `group_by()` or split instead.
## See `help('plotly_data')` for examples
## Warning in plot_ly(., x = ~hotel, y = ~adr, color = ~arrival_date_month, : 'layout' objects don't have these attributes: 'boxmode'
## Valid attributes include:
## 'font', 'title', 'uniformtext', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
## Warning: package 'DescTools' was built under R version 4.0.3
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
##
## %like%
ggplotly(
train %>% filter(adr != 5400) %>%
mutate(arrival_date_month = factor(arrival_date_month, labels = DescTools::StrLeft(levels(a$arrival_date_month), 3))) %>%
group_by(arrival_date_month, hotel) %>%
summarise(med.adr = median(adr, na.rm = TRUE)) %>%
ggplot(aes(arrival_date_month, med.adr, fill = hotel)) +
geom_col(position = 'dodge') +
scale_fill_manual(values = c('blue4','darkorange'))
) %>% layout(
title = 'Median ADR by Hotel/Month'
)## `summarise()` regrouping output by 'arrival_date_month' (override with `.groups` argument)
## == Use anomalize to improve your Forec
## Business Science offers a 1-hour course - Lab #18: Time Series Anomaly Detection!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
# time_decompose(data, target, method = c("stl", "twitter"), frequency = "auto", trend = "auto", ..., merge = FALSE, message = TRUE)
# anomalize(data, target, method = c("iqr", "gesd"), alpha = 0.05, max_anoms = 0.2, verbose = FALSE)
# The alpha parameter adjusts the width of the critical values. By default, alpha = 0.05.
# Lower values are more conservative while higher values are less prone to incorrectly classifying "normal" observations.
# max_anoms: The maximum percent of anomalies permitted to be identified.
# The STL method uses the stl() function from the stats package. STL works very well in circumstances where a long term trend is present (which applies in this case; see trend component in the prophet graphs below').
#use full data set, filter to hotel type, arrange by date
a1 = a %>% mutate(
arrival.date = make_date(
year = arrival_date_year,
month = match(arrival_date_month, month.name),
day = arrival_date_day_of_month)
)
(anomaly.hotel.resort = a1 %>% filter(hotel == 'Resort Hotel') %>%
group_by(arrival.date, hotel) %>%
summarise(total.bookings = sum(adults, children)) %>%
select(arrival.date, hotel, total.bookings) %>%
arrange(arrival.date) %>% as.tibble() %>%
time_decompose(total.bookings, method = 'stl', merge = TRUE) %>%
anomalize(remainder, alpha = 0.15, method = 'gesd') %>% #increasing sensitivity to outliers
time_recompose())## `summarise()` regrouping output by 'arrival.date' (override with `.groups` argument)
## Converting from tbl_df to tbl_time.
## Auto-index message: index = arrival.date
## frequency = 7 days
## trend = 91 days
(anomaly.hotel.city = a1 %>% filter(hotel == 'City Hotel') %>%
group_by(arrival.date, hotel) %>%
summarise(total.bookings = sum(adults, children)) %>%
select(arrival.date, hotel, total.bookings) %>%
arrange(arrival.date) %>% as.tibble() %>%
time_decompose(total.bookings, method = 'stl', merge = TRUE) %>%
anomalize(remainder, alpha = 0.15, method = 'gesd') %>% #increasing sensitivity to outliers
time_recompose())## `summarise()` regrouping output by 'arrival.date' (override with `.groups` argument)
## Converting from tbl_df to tbl_time.
## Auto-index message: index = arrival.date
## frequency = 7 days
## trend = 91 days
## Loading required package: Rcpp
##
## Attaching package: 'Rcpp'
## The following object is masked from 'package:rsample':
##
## populate
## Loading required package: rlang
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
## splice
## The following object is masked from 'package:data.table':
##
## :=
#renaming cols to prophet's col conventions
prophet.resort.df = anomaly.hotel.resort %>% select(ds = arrival.date, y = total.bookings)
#creating model
prophet.resort.mdl = prophet.resort.df %>% prophet()## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#using model make future period df
prophet.resort.future.df = prophet.resort.mdl %>% make_future_dataframe(
periods = 28, #next 4 wks
freq = 'day',
include_history = TRUE
)
#make forecasts df
prophet.resort.forecast.df = prophet.resort.mdl %>% predict(prophet.resort.future.df)
prophet.resort.forecast.df %>% head %>% DT::datatable()## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#plot forecast components
prophet.resort.mdl %>% prophet_plot_components(prophet.resort.forecast.df)library(prophet)
#renaming cols to prophet's col conventions
prophet.city.df = anomaly.hotel.city %>% select(ds = arrival.date, y = total.bookings)
#creating model
prophet.city.mdl = prophet.city.df %>% prophet()## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#using model make future period df
prophet.city.future.df = prophet.city.mdl %>% make_future_dataframe(
periods = 28, #next 4 wks
freq = 'day',
include_history = TRUE
)
#make forecasts df
prophet.city.forecast.df = prophet.city.mdl %>% predict(prophet.city.future.df)
prophet.city.forecast.df %>% head %>% DT::datatable()